library(tidyverse)
library(haven)
library(mice)
library(ggplot2)
library(lubridate)


df0 <- read_sav("D:\\UKBdata\\UKBB50万人的所有数据\\ukb baseline+随访.sav")

summary(df0$instance1_date)
# 计算每个日期列的中位数（跳过NA）
median_date0 <- median(df0$instance0_date, na.rm = TRUE)
median_date1 <- median(df0$instance1_date, na.rm = TRUE)

# 计算差值（天数），转换为年
date_diff <- median_date1 - median_date0
year_diff <- as.numeric(date_diff) / 365.25

# 存储结果
print(year_diff)

# 计算日期差异（天数）
date_diff <- df0$instance1_date - df0$instance0_date

# 转换为年数（假设每年365.25天）
year_diff <- as.numeric(date_diff) / 365.25

# 计算中位数（自动跳过NA）
median_year_diff <- median(year_diff, na.rm = TRUE)

print(median_year_diff)


#colnames(df0) %>% dput

df <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\20240608.csv")

df1 <- left_join(df0,df,by = "eid")

#save(df1,file = "./data/df1.Rdata")
#load("D:\\UKBdata\\dputdata\\df1.Rdata")

df11122 <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_muscle.csv")
summary(df11122$`Arm fat-free mass (left) | Instance 1`)
summary(df11122$`Arm fat-free mass (left) | Instance 2`)
summary(df11122$`Arm fat-free mass (left) | Instance 3`)
# 加载必要的库
library(data.table)
df111 <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\result.csv")
class(df111)

head(df111)

library(lubridate)

# 转换日期
df111$Disease_Date <- ymd(df111$Disease_Date)

# 检查结果
class(df111$Disease_Date)

ca1 <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_ca1.csv")


# 加载必要的包
library(dplyr)
library(stringr)
library(future)
library(future.apply)

# 设置并行化策略
plan(multisession, workers = availableCores() - 1)

# 定义 ICD-10 疾病分类映射
icd10_mapping <- list(
  ALD = c("^K70"),
  Viral_Hepatitis = c("^B16", "^B17", "^B18", "^B19"),
  Autoimmune_Liver_Disease = c("^K83\\.0A", "^K83\\.0F", "^K74\\.3", "^K75\\.4"),
  Hemochromatosis = c("^E83\\.1"),
  Wilson_Disease = c("^E83\\.0B"),
  Alpha_1_Antitrypsin_Deficiency = c("^E88\\.0A", "^E88\\.0B"),
  Budd_Chiari = c("^I82\\.0", "^K76\\.5"),
  Chronic_Hepatitis_Unspecified = c("^K73\\.9", "^K73\\.2"),
  Secondary_Or_Unspecified_Biliary_Cirrhosis = c("^K74\\.4", "^K74\\.5"),
  Alcohol_Use_Disorder = c("^F10"),
  Somatic_Consequences_Of_Alcohol = c("^E24\\.4", "^G62\\.1", "^I42\\.6", "^K29\\.2", "^G31\\.2", 
                                      "^G72\\.1", "^K85\\.2", "^K86\\.0", "^T51\\.0", "^T51\\.9", 
                                      "^X65", "^Y57\\.3", "^Z50\\.2", "^Z71\\.4", "^Z72\\.1"),
  Drug_Use_Disorder_Except_Nicotine_Caffeine = c("^F11", "^F12", "^F13", "^F14", "^F16", "^F18", "^F19")
)

# 将映射转换为长格式
icd10_lookup <- stack(icd10_mapping)

# 确保类别名为字符型
icd10_lookup$ind <- as.character(icd10_lookup$ind)

# 匹配疾病的函数
match_disease <- function(description) {
  matched_categories <- icd10_lookup$ind[sapply(icd10_lookup$values, function(code) str_detect(description, code))]
  if (length(matched_categories) > 0) {
    return(matched_categories)
  } else {
    return(NA)
  }
}

# 使用并行计算匹配疾病分类
df_with_diseases <- df111 %>%
  mutate(
    Disease_Category = future_sapply(Disease_Description, match_disease, future.seed = TRUE)
  ) %>%
  unnest(Disease_Category) # 展开疾病分类

# 提取每个疾病最早日期，并检查匹配
disease_summary <- df_with_diseases %>%
  group_by(eid, Disease_Category) %>%
  summarize(
    First_Disease_Date = min(as.Date(Disease_Date), na.rm = TRUE), # 获取最早日期
    Has_Disease = TRUE, # 标记患病
    .groups = "drop"
  ) %>%
  pivot_wider(
    names_from = Disease_Category,
    values_from = c(First_Disease_Date, Has_Disease),
    names_sep = "_"
  )

# 填充缺失的标记列
disease_summary <- disease_summary %>%
  mutate(across(starts_with("Has_Disease"), ~ ifelse(is.na(.), FALSE, .)))

# 合并原始数据
final_result <- df111 %>%
  select(eid) %>%
  distinct() %>%
  left_join(disease_summary, by = "eid")

colnames(final_result) %>% dput()

# 删除包含 NA 的列
final_result <- final_result %>%
  select(-starts_with("First_Disease_Date_NA"), -starts_with("Has_Disease_NA"))

# 新增一列，判断是否患任何疾病
final_result <- final_result %>%
  mutate(
    Any_Disease = if_else(rowSums(select(., starts_with("Has_Disease_"))) > 0, 1, 0)
  )

table(final_result$Any_Disease)

# 重命名数据框
other_liver_disease <- final_result

# 新增一列，计算最早的诊断时间（如果患病）
other_liver_disease <- other_liver_disease %>%
  rowwise() %>%
  mutate(
    Earliest_Diagnosis_Date = if_else(
      Any_Disease == 1, # 如果有患病
      min(c_across(starts_with("First_Disease_Date_")), na.rm = TRUE), # 找到最早日期
      as.Date(NA) # 否则为空
    )
  ) %>%
  ungroup() # 取消行级操作


# 保存结果
write.csv(other_liver_disease, "D:\\UKBdata\\UKBB50万人的所有数据\\other_liver_disease.csv", row.names = FALSE)

# 清除并行策略
plan(sequential)


#从这里开始
other_liver_disease <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\other_liver_disease.csv")

drink<- read.csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_drink_walk.csv")
table(drink$Frequency.of.consuming.six.or.more.units.of.alcohol)

drink111<- read.csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_drink1.csv")
table(drink111$Alcohol.drinker.status...Instance.0)

education111<- read.csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_education.csv")
table(education111$Qualifications...Instance.0)

education111 <- education111 %>%
  mutate(education_category = case_when(
    # 基础教育：包括中学、高中及其相关资格
    grepl("A levels|AS levels|O levels|GCSEs|CSEs", Qualifications...Instance.0) ~ "Basic Education",
    
    # 高等教育：包括大学学位及其相关资格，如本科、硕士、NVQ高层次等
    grepl("College or University degree|NVQ|HND|HNC", Qualifications...Instance.0) ~ "Higher Education",
    
    # 对于包含“Other professional qualifications”的情况，归入“Higher Education”
    grepl("Other professional qualifications", Qualifications...Instance.0) ~ "Higher Education", 
    
    # 其他类别：无法匹配到基础或高等教育的情况
    TRUE ~ "Other"
  ))

# 查看分类后的数据
table(education111$education_category)


education123 <- education111 %>%
  mutate(education_category = case_when(
    # 高等教育：包括大学学位
    grepl("College or University degree", Qualifications...Instance.0) ~ "College or University degree",
    
    # 其他：基础教育和职业资格
    TRUE ~ "Other levels"
  ))

# 查看分类后的结果
table(education123$education_category)

education_state <- education123 %>%
  select(Participant.ID, 
         education_category) %>%
  rename(eid = Participant.ID)

drink_state1 <- drink111 %>%
  select(Participant.ID, 
         `Alcohol.drinker.status...Instance.0`, 
         `Alcohol.drinker.status...Instance.1`, 
         `Alcohol.drinker.status...Instance.2`, 
         `Alcohol.drinker.status...Instance.3`, 
         `Alcohol.intake.frequency....Instance.0`, 
         `Alcohol.intake.frequency....Instance.1`, 
         `Alcohol.intake.frequency....Instance.2`, 
         `Alcohol.intake.frequency....Instance.3`) %>%
  rename(eid = Participant.ID) %>% 
  rename(alcohol_new0 = Alcohol.drinker.status...Instance.0) %>% 
  rename(alcohol_new1 = Alcohol.drinker.status...Instance.1) 
drink_state <- drink_state1 %>%
  mutate(alcohol_new0 = ifelse(is.na(alcohol_new0) | alcohol_new0 == "Prefer not to answer", "Never", alcohol_new0),
         alcohol_new1 = ifelse(is.na(alcohol_new1) | alcohol_new1 == "Prefer not to answer", "Never", alcohol_new1))
table(drink_state$alcohol_new0)
# 替换空格为 "Never"
drink_state <- drink_state %>%
  mutate(alcohol_new0 = ifelse(alcohol_new0 == "", "Never", alcohol_new0),
         alcohol_new1 = ifelse(alcohol_new1 == "", "Never", alcohol_new1))


table(drink_state$alcohol_new0)

colnames(drink_state) %>% dput()

df223 <- left_join(df1,other_liver_disease,by = "eid")

df22 <- left_join(df223,drink_state,by = "eid")

blood1<- read.csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_blood1.csv")
blood2<- read.csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_blood2.csv")
blood3<- read.csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_blood3.csv")
blood4<- read.csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_blood4.csv")
colnames(blood1) %>% dput()
colnames(blood2) %>% dput()
colnames(blood3) %>% dput()
colnames(blood4) %>% dput()
# Blood1的列名
colnames(blood1) <- c("Participant.ID", 
                      "Basophil.count.Instance.0", "Basophil.count.Instance.1", "Basophil.count.Instance.2", 
                      "Basophil.percentage.Instance.0", "Basophil.percentage.Instance.1", "Basophil.percentage.Instance.2", 
                      "Eosinophil.count.Instance.0", "Eosinophil.count.Instance.1", "Eosinophil.count.Instance.2", 
                      "Eosinophil.percentage.Instance.0", "Eosinophil.percentage.Instance.1", "Eosinophil.percentage.Instance.2", 
                      "Haematocrit.percentage.Instance.0", "Haematocrit.percentage.Instance.1", "Haematocrit.percentage.Instance.2", 
                      "Haemoglobin.concentration.Instance.0", "Haemoglobin.concentration.Instance.1", "Haemoglobin.concentration.Instance.2", 
                      "High.light.scatter.reticulocyte.count.Instance.0", "High.light.scatter.reticulocyte.count.Instance.1", "High.light.scatter.reticulocyte.count.Instance.2", 
                      "High.light.scatter.reticulocyte.percentage.Instance.0", "High.light.scatter.reticulocyte.percentage.Instance.1", "High.light.scatter.reticulocyte.percentage.Instance.2")

# Blood2的列名
colnames(blood2) <- c("Participant.ID", 
                      "Immature.reticulocyte.fraction.Instance.0", "Immature.reticulocyte.fraction.Instance.1", "Immature.reticulocyte.fraction.Instance.2", 
                      "Lymphocyte.count.Instance.0", "Lymphocyte.count.Instance.1", "Lymphocyte.count.Instance.2", 
                      "Lymphocyte.percentage.Instance.0", "Lymphocyte.percentage.Instance.1", "Lymphocyte.percentage.Instance.2", 
                      "Mean.corpuscular.haemoglobin.Instance.0", "Mean.corpuscular.haemoglobin.Instance.1", "Mean.corpuscular.haemoglobin.Instance.2", 
                      "Mean.corpuscular.haemoglobin.concentration.Instance.0", "Mean.corpuscular.haemoglobin.concentration.Instance.1", "Mean.corpuscular.haemoglobin.concentration.Instance.2", 
                      "Mean.corpuscular.volume.Instance.0", "Mean.corpuscular.volume.Instance.1", "Mean.corpuscular.volume.Instance.2", 
                      "Mean.platelet.volume.Instance.0", "Mean.platelet.volume.Instance.1", "Mean.platelet.volume.Instance.2", 
                      "Mean.reticulocyte.volume.Instance.0", "Mean.reticulocyte.volume.Instance.1", "Mean.reticulocyte.volume.Instance.2", 
                      "Mean.sphered.cell.volume.Instance.0", "Mean.sphered.cell.volume.Instance.1", "Mean.sphered.cell.volume.Instance.2")

# Blood3的列名
colnames(blood3) <- c("Participant.ID", 
                      "Monocyte.count.Instance.0", "Monocyte.count.Instance.1", "Monocyte.count.Instance.2", 
                      "Monocyte.percentage.Instance.0", "Monocyte.percentage.Instance.1", "Monocyte.percentage.Instance.2", 
                      "Neutrophil.count.Instance.0", "Neutrophil.count.Instance.1", "Neutrophil.count.Instance.2", 
                      "Neutrophil.percentage.Instance.0", "Neutrophil.percentage.Instance.1", "Neutrophil.percentage.Instance.2", 
                      "Nucleated.red.blood.cell.count.Instance.0", "Nucleated.red.blood.cell.count.Instance.1", "Nucleated.red.blood.cell.count.Instance.2", 
                      "Nucleated.red.blood.cell.percentage.Instance.0", "Nucleated.red.blood.cell.percentage.Instance.1", "Nucleated.red.blood.cell.percentage.Instance.2", 
                      "Platelet.count.Instance.0", "Platelet.count.Instance.1", "Platelet.count.Instance.2", 
                      "Platelet.crit.Instance.0", "Platelet.crit.Instance.1", "Platelet.crit.Instance.2")

# Blood4的列名
colnames(blood4) <- c("Participant.ID", 
                      "Platelet.distribution.width.Instance.0", "Platelet.distribution.width.Instance.1", "Platelet.distribution.width.Instance.2", 
                      "Red.blood.cell.count.Instance.0", "Red.blood.cell.count.Instance.1", "Red.blood.cell.count.Instance.2", 
                      "Red.blood.cell.distribution.width.Instance.0", "Red.blood.cell.distribution.width.Instance.1", "Red.blood.cell.distribution.width.Instance.2", 
                      "Reticulocyte.count.Instance.0", "Reticulocyte.count.Instance.1", "Reticulocyte.count.Instance.2", 
                      "Reticulocyte.percentage.Instance.0", "Reticulocyte.percentage.Instance.1", "Reticulocyte.percentage.Instance.2", 
                      "White.blood.cell.count.Instance.0", "White.blood.cell.count.Instance.1", "White.blood.cell.count.Instance.2")


# 查看更新后的列名
colnames(blood1)
colnames(blood2)
colnames(blood3)
colnames(blood4)

# 合并数据框
combined_blood <- blood1 %>%
  left_join(blood2, by = "Participant.ID") %>%
  left_join(blood3, by = "Participant.ID") %>%
  left_join(blood4, by = "Participant.ID") %>% 
  rename(eid = Participant.ID)

# 查看合并后的数据框
colnames(combined_blood)

# 计算 SII, SIRI, NLR, MIR, PLR，并确保分母不为 0，缺失值使用列均值填补
combined_blood <- combined_blood %>%
  mutate(
    # 计算实例 0 的 SII
    SII_0 = ifelse(Lymphocyte.count.Instance.0 != 0, 
                   (Platelet.count.Instance.0 * Neutrophil.count.Instance.0) / Lymphocyte.count.Instance.0, 
                   NA),
    
    # 计算实例 1 的 SII
    SII_1 = ifelse(Lymphocyte.count.Instance.1 != 0, 
                   (Platelet.count.Instance.1 * Neutrophil.count.Instance.1) / Lymphocyte.count.Instance.1, 
                   NA),
    
    # 计算实例 0 的 SIRI
    SIRI_0 = ifelse(Lymphocyte.count.Instance.0 != 0, 
                    (Neutrophil.count.Instance.0 * Monocyte.count.Instance.0) / Lymphocyte.count.Instance.0, 
                    NA),
    
    # 计算实例 1 的 SIRI
    SIRI_1 = ifelse(Lymphocyte.count.Instance.1 != 0, 
                    (Neutrophil.count.Instance.1 * Monocyte.count.Instance.1) / Lymphocyte.count.Instance.1, 
                    NA),
    
    # 计算实例 0 的 NLR
    NLR_0 = ifelse(Lymphocyte.count.Instance.0 != 0, 
                   Neutrophil.count.Instance.0 / Lymphocyte.count.Instance.0, 
                   NA),
    
    # 计算实例 1 的 NLR
    NLR_1 = ifelse(Lymphocyte.count.Instance.1 != 0, 
                   Neutrophil.count.Instance.1 / Lymphocyte.count.Instance.1, 
                   NA),
    
    # 计算实例 0 的 MIR
    MIR_0 = ifelse(Immature.reticulocyte.fraction.Instance.0 != 0, 
                   Monocyte.count.Instance.0 / Immature.reticulocyte.fraction.Instance.0, 
                   NA),
    
    # 计算实例 1 的 MIR
    MIR_1 = ifelse(Immature.reticulocyte.fraction.Instance.1 != 0, 
                   Monocyte.count.Instance.1 / Immature.reticulocyte.fraction.Instance.1, 
                   NA),
    
    # 计算实例 0 的 PLR
    PLR_0 = ifelse(Lymphocyte.count.Instance.0 != 0, 
                   Platelet.count.Instance.0 / Lymphocyte.count.Instance.0, 
                   NA),
    
    # 计算实例 1 的 PLR
    PLR_1 = ifelse(Lymphocyte.count.Instance.1 != 0, 
                   Platelet.count.Instance.1 / Lymphocyte.count.Instance.1, 
                   NA)
  ) %>%
  # 对于每列进行缺失值插补，使用该列的平均数进行填充
  mutate(
    SII_0 = ifelse(is.na(SII_0), mean(SII_0, na.rm = TRUE), SII_0),
    SII_1 = ifelse(is.na(SII_1), mean(SII_1, na.rm = TRUE), SII_1),
    SIRI_0 = ifelse(is.na(SIRI_0), mean(SIRI_0, na.rm = TRUE), SIRI_0),
    SIRI_1 = ifelse(is.na(SIRI_1), mean(SIRI_1, na.rm = TRUE), SIRI_1),
    NLR_0 = ifelse(is.na(NLR_0), mean(NLR_0, na.rm = TRUE), NLR_0),
    NLR_1 = ifelse(is.na(NLR_1), mean(NLR_1, na.rm = TRUE), NLR_1),
    MIR_0 = ifelse(is.na(MIR_0), mean(MIR_0, na.rm = TRUE), MIR_0),
    MIR_1 = ifelse(is.na(MIR_1), mean(MIR_1, na.rm = TRUE), MIR_1),
    PLR_0 = ifelse(is.na(PLR_0), mean(PLR_0, na.rm = TRUE), PLR_0),
    PLR_1 = ifelse(is.na(PLR_1), mean(PLR_1, na.rm = TRUE), PLR_1)
  )

summary(combined_blood$SII_0)
summary(combined_blood$SIRI_0)
summary(combined_blood$NLR_0)
summary(combined_blood$MIR_0)
summary(combined_blood$PLR_0)

blood_inflam<- combined_blood %>%
  select(eid, SII_0, SII_1, SIRI_0, SIRI_1, NLR_0, NLR_1, MIR_0, MIR_1, PLR_0, PLR_1)

df23 <- left_join(df22,education_state,by = "eid")

df2 <- left_join(df23,blood_inflam,by = "eid")

df2 <- df2 %>%
  mutate(education_new = case_when(
    education == 0 ~ "Other levels",
    education == 1 ~ "College or University degree",
    TRUE ~ NA_character_  # 处理可能的缺失值
  ))

table(df2$Alcohol.drinker.status...Instance.0)

table(df2$education_category)
table(df2$education_new)

colnames(df2) %>% dput()

# 检查日期列的类型
class(df2$Earliest_Diagnosis_Date) # 确保这也是 "Date" 类型
class(df2$instance0_date)          # 已经是 "Date" 类型

# 新增标记列，仅标记基线之前已诊断其他肝病的个体
df2 <- df2 %>%
  mutate(
    Other_Liver_Disease_Before_Baseline = if_else(
      !is.na(Earliest_Diagnosis_Date) & # 确保有诊断日期
        Earliest_Diagnosis_Date < instance0_date, # 诊断时间在基线之前
      1, # 标记为1
      0  # 否则标记为0
    )
  )


table(df2$Other_Liver_Disease_Before_Baseline)


#筛选NAFLD和NASH
df222 <- df111 %>% 
  mutate(
    NAFLD = if_else(str_detect(Disease_Description, "^K76\\.0"), 1, 0,),
    NASH = if_else(str_detect(Disease_Description, "^K75\\.8"), 1, 0,),
  )

dfnafld <- df222 %>% 
  filter(NAFLD==1|NASH==1)

death1 <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\death register\\data_death.csv")
death2 <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\death register\\data_death_cause.csv")
death3 <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\death register\\data_participant (1).csv")

# 筛选 Primary cause of death
death2_primary <- subset(death2, level == "Primary cause of death")

# 使用 grepl() 函数筛选 cause_icd10 列以 "K76.0" 开头的行
death2_primary_nafld <- subset(death2_primary, grepl("^K76\\.0", cause_icd10)| grepl("^K75\\.8", cause_icd10))


# 只保留第2, 5, 6列，并创建一个新的数据框
death_NAFLD <- death2_primary_nafld[, c("eid", "level", "cause_icd10")]

# 添加一列识别死因为 NAFLD 的标识列
death_NAFLD$NAFLD_death <- ifelse(grepl("^K76\\.0", death_NAFLD$cause_icd10)| grepl("^K75\\.8", death_NAFLD$cause_icd10), 1, 0)
#45人

# 使用 merge() 函数进行左合并，保持 df2 中的所有行
df2_merged <- merge(df2, death_NAFLD[, c("eid", "NAFLD_death")], by = "eid", all.x = TRUE)
df2_merged <- merge(df2_merged, dfnafld, by = "eid", all.x = TRUE)

df2_merged <- df2_merged %>%
  rename(NAFLD_diag_date = Disease_Date)

# 将日期列转换为 Date 类型
df2_merged$NAFLD_diag_date <- as.Date(df2_merged$NAFLD_diag_date)
df2_merged$instance0_date <- as.Date(df2_merged$instance0_date)

# 新增标记列，根据诊断日期相对于基线日期的不同情况进行标记
df2_merged <- df2_merged %>%
  mutate(
    NAFLD_Diagnosis_Timing = case_when(
      !is.na(NAFLD_diag_date) & NAFLD_diag_date < instance0_date + years(2) ~ 1, # 基线前和基线2年内患病标记为1
      !is.na(NAFLD_diag_date) & NAFLD_diag_date <= instance1_date ~ 2, 
      TRUE ~ 0 # 其他情况标记为0
    )
  )

table(df2_merged$NAFLD_Diagnosis_Timing)

# 统计instance1_date列的非缺失值行数
non_na_count <- sum(!is.na(df2_merged$instance1_date))
print(non_na_count)

save(df2_merged,file = "D:\\UKBdata\\dputdata\\df2_merged.rdata")


load(file = "D:\\UKBdata\\dputdata\\df2_merged.rdata")

df5 <- df2_merged


summary(df5$Follow_Up_Time_Years)
summary(df5$death_date)
table(df5$Event_Status)

muscle <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\muscle1.csv")

BIA <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_bia.csv")

muscle <-  muscle %>% 
  rename(eid='Participant ID')

BIA <-  BIA %>% 
  rename(eid='Participant ID')
# 替换 muscle 数据框中的列名
colnames(BIA) <- gsub(" | ", "_", colnames(BIA))  # 替换空格为下划线
colnames(BIA) <- gsub("\\|", "", colnames(BIA))  # 移除竖线符号

colnames(muscle) %>% dput()

# 替换 muscle 数据框中的列名
colnames(muscle) <- gsub(" | ", "_", colnames(muscle))  # 替换空格为下划线
colnames(muscle) <- gsub("\\|", "", colnames(muscle))  # 移除竖线符号

# 查看修改后的列名
colnames(muscle)

fat <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\gzh_fat.csv")

fat <-  fat %>% 
  rename(eid='Participant ID')
colnames(fat) %>% dput()
# 原始列名
colnames(fat) <- c("eid", "Arm_fat_mass_left_Instance_0", "Arm_fat_mass_left_Instance_1", 
                   "Arm_fat_mass_left_Instance_2", "Arm_fat_mass_left_Instance_3", 
                   "Arm_fat_mass_right_Instance_0", "Arm_fat_mass_right_Instance_1", 
                   "Arm_fat_mass_right_Instance_2", "Arm_fat_mass_right_Instance_3", 
                   "Basal_metabolic_rate_Instance_0", "Basal_metabolic_rate_Instance_1", 
                   "Basal_metabolic_rate_Instance_2", "Basal_metabolic_rate_Instance_3", 
                   "Body_fat_percentage_Instance_0", "Body_fat_percentage_Instance_1", 
                   "Body_fat_percentage_Instance_2", "Body_fat_percentage_Instance_3", 
                   "Whole_body_fat_mass_Instance_0", "Whole_body_fat_mass_Instance_1", 
                   "Whole_body_fat_mass_Instance_2", "Whole_body_fat_mass_Instance_3")


colnames(fat) %>% dput()

# 找到 muscle 和 fat 中的列
muscle_columns <- colnames(muscle)
fat_columns <- colnames(fat)

# 筛选出 muscle 中 fat 没有的列（排除 eid 列）
muscle_columns_to_add <- muscle_columns[!muscle_columns %in% c(fat_columns, "eid")]

# 从 muscle 中选择需要的列（eid 和其他不重复的列）
muscle_to_add <- muscle %>%
  select(eid, all_of(muscle_columns_to_add))

# 根据 eid 将 muscle 中筛选的列合并到 fat 中
df_final <- left_join(fat, muscle_to_add, by = "eid")

df_final1 <- left_join(df_final, BIA, by = "eid")


# 最后将合并后的 df_combined 合并到 df5_del_basenafld_1 中
df6 <- left_join(df5, df_final1, by = "eid")

#定义高血压
BP <- read_csv("D:\\UKBdata\\UKBB50万人的所有数据\\GZH_BP_New.csv")
# 替换 muscle 数据框中的列名
colnames(BP) <- gsub(" | ", "_", colnames(BP))  # 替换空格为下划线
colnames(BP) <- gsub("\\|", "", colnames(BP))  # 移除竖线符号
colnames(BP) <- gsub("\\,", "", colnames(BP))  # 移除逗号符号
colnames(BP) %>% dput

BP <-  BP %>% 
  rename(eid='Participant_ID')
colnames(BP) %>% dput()
summary(BP$Diastolic_blood_pressure_automated_reading__Instance_0__Array_0)
summary(BP$Diastolic_blood_pressure_automated_reading__Instance_0__Array_1)
summary(BP$Diastolic_blood_pressure_manual_reading__Instance_0__Array_0)

instances <- 0:2

# 定义一个函数来计算平均值
compute_average <- function(df, measure, instance) {
  array0_col <- paste0(measure, "__Instance_", instance, "__Array_0")
  array1_col <- paste0(measure, "__Instance_", instance, "__Array_1")
  
  # 计算平均值，如果只有一个测量值，则使用该值
  average <- rowMeans(df[, c(array0_col, array1_col)], na.rm = TRUE)
  
  # 如果两个值都是 NA，则结果为 NA
  average[is.nan(average)] <- NA
  
  return(average)
}

# 计算每个实例的平均舒张压和收缩压
for (i in instances) {
  # 计算平均舒张压
  diastolic_avg_col <- paste0("Diastolic_avg_Instance_", i)
  BP[[diastolic_avg_col]] <- compute_average(BP, "Diastolic_blood_pressure_automated_reading", i)
  
  # 计算平均收缩压
  systolic_avg_col <- paste0("Systolic_avg_Instance_", i)
  BP[[systolic_avg_col]] <- compute_average(BP, "Systolic_blood_pressure_automated_reading", i)
}

# 定义一个函数来判断高血压
compute_hypertension <- function(systolic, diastolic) {
  # 判断条件：收缩压 >= 140 或 舒张压 >= 90
  hypertension <- ifelse((!is.na(systolic) & systolic >= 140) | 
                           (!is.na(diastolic) & diastolic >= 90), 1, 0)
  # 将 NA 替换为 0
  hypertension[is.na(hypertension)] <- 0
  return(hypertension)
}

# 计算每个实例的高血压标记
for (i in instances) {
  # 获取平均收缩压和舒张压列名
  diastolic_avg_col <- paste0("Diastolic_avg_Instance_", i)
  systolic_avg_col <- paste0("Systolic_avg_Instance_", i)
  
  # 创建高血压_测量列
  hypertension_col <- paste0("HBP_measure_Instance_", i)
  BP[[hypertension_col]] <- compute_hypertension(BP[[systolic_avg_col]], BP[[diastolic_avg_col]])
}

table(BP$HBP_measure_Instance_0)
table(BP$HBP_measure_Instance_1)
table(BP$HBP_measure_Instance_2)


#列入诊断的HBP患者
# 定义高血压相关的 ICD 编码
hypertension_codes <- c(
  "I10",
  "I11", "I11.0", "I11.9",
  "I12", "I12.0", "I12.9",
  "I13", "I13.0", "I13.1", "I13.2", "I13.9",
  "I15", "I15.0", "I15.1", "I15.2", "I15.8", "I15.9"
)

# 转义点并创建正则表达式模式
escaped_codes <- gsub("\\.", "\\\\.", hypertension_codes)
pattern <- paste0("\\b(", paste(escaped_codes, collapse = "|"), ")\\b")

# 添加 HBP_diagnose 列
df111 <- df111 %>%
  mutate(
    HBP_diagnose = if_else(
      str_detect(Disease_Description, pattern),
      1,
      0
    )
  )
table(df111$HBP_diagnose)

# 为每个 eid 保留一条记录，优先保留 HBP_diagnose = 1 的记录
df_BP <- df111 %>%
  arrange(eid, desc(HBP_diagnose)) %>%  # 按 eid 排序，优先 HBP_diagnose = 1
  group_by(eid) %>%
  slice(1) %>%  # 保留每组的第一条记录
  ungroup()
table(df_BP$HBP_diagnose)

df_ins0 <- df6 %>% 
  select("eid","instance0_date", "instance1_date", "instance2_date")

BP1 <- left_join(df_BP, df_ins0, by = "eid")
BP2 <- BP1 %>%
  mutate(
    HBP_diagnose0 = ifelse(
      HBP_diagnose == 1 & (instance0_date <= Disease_Date | is.na(instance0_date)),1,0))
table(BP2$HBP_diagnose0)

BP3 <- left_join(BP, BP2, by = "eid") 


BP4 <- BP3 %>% 
  mutate(HBP_all=ifelse(HBP_measure_Instance_0==0&HBP_diagnose0==0,0,1))

table(BP4$HBP_all)

BP_final <- BP4 %>% 
  select("eid","HBP_all")

BP_final <- BP_final %>%
  arrange(eid, desc(HBP_all)) %>%  # 将 HBP_all 为 1 的行排在前面
  distinct(eid, .keep_all = TRUE) 

# 检查是否存在任何重复的 eid
any_duplicates <- any(duplicated(BP_final$eid))
print(any_duplicates)  # 如果有重复，返回 TRUE，否则返回 FALSE

# 计算重复的 eid 数量
num_duplicates <- sum(duplicated(BP_final$eid))
print(num_duplicates)  # 返回重复 eid 的总数

# 最后将合并后的 df_combined 合并到 df5_del_basenafld_1 中
df6 <- left_join(df6, BP_final, by = "eid")
table(df6$HBP_all,useNA = "ifany")

df6 <- df6 %>%
  mutate(HBP_all = replace_na(HBP_all, 0)) 

#定义腹部肥胖
#colnames(df6) %>% dput()
summary(df6$Waist_circumference__Instance_0)
df6 <- df6 %>%
  mutate(central_obesity0 = case_when(
    sex == 0 & Waist_circumference__Instance_0 >= 80 ~ 1,
    sex == 1 & Waist_circumference__Instance_0 >= 90 ~ 1,
    TRUE ~ 0  # 其他情况，包括缺失值，设为0
  ))
table(df6$central_obesity0 ,useNA = "ifany")

#低HDL
summary(df6$`HDL@307600.0`)
df6 <- df6 %>%
  mutate(low_HDL0 = case_when(
    sex == 0 & `HDL@307600.0` < 1.29 ~ 1,
    sex == 1 &`HDL@307600.0` < 1.03 ~ 1,
    TRUE ~ 0  # 其他情况，包括缺失值，设为0
  ))
table(df6$low_HDL0)

#高TG
summary(df6$`Triglycerides@308700.0`)
df6 <- df6 %>% 
  mutate(
    high_TG0 = case_when(
      is.na(`Triglycerides@308700.0`) ~ 0,                   # 缺失值时设为0
      `Triglycerides@308700.0` >= 1.7 ~ 1,                   # 当 Triglycerides >= 1.7时设为1
      TRUE ~ 0                                               # 其他情况设为0
    )
  )
table(df6$high_TG0)

#T2DM
summary(df6$`Glucose@307400.0`)
df6 <- df6 %>% 
  mutate(
    high_glu0 = case_when(
      is.na(`Glucose@307400.0`) ~ 0,                   # 缺失值时设为0
      `Glucose@307400.0` >= 5.6 ~ 1,                   # 当 Triglycerides >= 1.7时设为1
      TRUE ~ 0                                               # 其他情况设为0
    )
  )
df6 <- df6 %>%
  mutate(T2DM0_all = ifelse(high_glu0 == 1 | T2DM0 == 1, 1, 0)) %>%
  replace_na(list(T2DM0_all = 0))
table(df6$T2DM0_all)

df6 <- df6 %>%
  mutate(MET_group=case_when(
    MET插补0 < 600 ~ 1,         # 小于 600 的归为 1
    MET插补0 >= 600 & MET插补0 < 3000 ~ 2,  # 600 到 3000 之间的归为 2
    MET插补0 >= 3000 ~ 3        # 大于等于 3000 的归为 3
  )) %>%
  # 将 MET_group 转换为因子变量，并设置标签
  mutate(MET_group = factor(MET_group, 
                            levels = c(1, 2, 3), 
                            labels = c("Low physical activity", "Moderate physical activity", 
                                       "High physical activity")))



#save(df6,file = "D:\\UKBdata\\dputdata\\df6.Rdata")

library(lubridate)

# 确保日期列是 Date 类型
df6$NAFLD_diag_date <- as.Date(df6$NAFLD_diag_date)
df6$instance0_date <- as.Date(df6$instance0_date)

# 定义最后一个已知的随访截止日期
last_known_date <- as.Date("2022-12-19")

summary(df6$NAFLD_diag_date)
summary(df6$death_date)
# 生成新列：事件状态及随访时间
df7 <- df6 %>%
  mutate(
    Event_Status = case_when(
      # 新增条件：NAFLD诊断日期早于基线时间时标记为999
      # 新增条件：NAFLD诊断日期早于第一次随访时间时标记为666
      !is.na(NAFLD_diag_date) & (NAFLD_diag_date < instance0_date) ~ 999, 
      !is.na(NAFLD_diag_date) & (NAFLD_diag_date < instance1_date) ~ 666,
      # 原有条件保持不变
      !is.na(NAFLD_diag_date) ~ 1,      # NAFLD诊断
      !is.na(death_date) ~ 2,           # 其他原因死亡
      TRUE ~ 0                          # 右删失
    )
  )

table(df7$Event_Status,useNA = "ifany")    

df8 <- df7 %>%
  mutate(
    # 计算instance0基准随访时间
    NAFLD_FollowUp_Days0 = ifelse(Event_Status == 1,
                                  as.numeric(difftime(NAFLD_diag_date, instance0_date, units = "days")),
                                  NA
    ),
    NAFLD_FollowUp_Years0 = ifelse(Event_Status == 1,
                                   pmax(NAFLD_FollowUp_Days0 / 365.25, 0),  # 处理早于基线的情况为0
                                   NA
    ),
    
    # 计算instance1基准随访时间
    NAFLD_FollowUp_Days1 = ifelse(Event_Status == 1,
                                  as.numeric(difftime(NAFLD_diag_date, instance1_date, units = "days")),
                                  NA
    ),
    NAFLD_FollowUp_Years1 = ifelse(Event_Status == 1,
                                   pmax(NAFLD_FollowUp_Days1 / 365.25, 0),
                                   NA
    )
  )

summary(df8$NAFLD_FollowUp_Years0)
summary(df8$NAFLD_FollowUp_Years1)
table(df8$Event_Status)


df9 <- df8 %>%
  mutate(
    FollowUp_Years_NAFLD0 = case_when(
      # Event_Status=1：计算NAFLD诊断日期到基线的时间差
      Event_Status == 1 ~ 
        pmax(
          as.numeric(
            difftime(NAFLD_diag_date, instance0_date, units = "days")
          ) / 365.25,
          0  # 处理诊断早于基线的情况[3,5](@ref)
        ),
      
      # Event_Status=999：直接返回NA
      Event_Status == 999 ~ NA_real_,
      
      # Event_Status=2：验证死亡日期与诊断日期的关系
      Event_Status == 2 & !is.na(death_date) & 
        (coalesce(death_date,last_known_date) <= coalesce(NAFLD_diag_date,last_known_date)) ~
        pmax(
          as.numeric(
            difftime(death_date, instance0_date, units = "days")
          ) / 365.25,
          0
        ),
      
      # Event_Status=0：使用最后已知日期计算
      Event_Status == 0 ~ 
        pmax(
          as.numeric(
            difftime(last_known_date, instance0_date, units = "days")
          ) / 365.25,
          0
        ),
      
      # 其他异常情况返回NA
      TRUE ~ NA_real_
    )
  )


df9 <- df9 %>%
  mutate(
    FollowUp_Years_NAFLD1 = case_when(
      # Event_Status=1：计算NAFLD诊断日期到基线的时间差
      Event_Status == 1 ~ 
        pmax(
          as.numeric(
            difftime(NAFLD_diag_date, instance1_date, units = "days")
          ) / 365.25,
          0  # 处理诊断早于基线的情况[3,5](@ref)
        ),
      
      # Event_Status=999：直接返回NA
      Event_Status == 999 ~ NA_real_,
      # Event_Status=666：直接返回NA
      Event_Status == 666 ~ NA_real_,
      
      # Event_Status=2：验证死亡日期与诊断日期的关系
      Event_Status == 2 & !is.na(death_date) & 
        (coalesce(death_date,last_known_date) <= coalesce(NAFLD_diag_date,last_known_date)) ~
        pmax(
          as.numeric(
            difftime(death_date, instance1_date, units = "days")
          ) / 365.25,
          0
        ),
      
      # Event_Status=0：使用最后已知日期计算
      Event_Status == 0 ~ 
        pmax(
          as.numeric(
            difftime(last_known_date, instance1_date, units = "days")
          ) / 365.25,
          0
        ),
      
      # 其他异常情况返回NA
      TRUE ~ NA_real_
    )
  )

summary(df9$FollowUp_Years_NAFLD0)
summary(df9$FollowUp_Years_NAFLD1)

df10 <- df9 %>%
  mutate(
    # 基线时 ALST 计算 (Instance_0)
    ALST_Instance_0 = case_when(
      !is.na(Body_fat_percentage_Instance_0) & !is.na(Weight__Instance_0) ~ 
        (0.958 * (Weight__Instance_0 * (1 - Body_fat_percentage_Instance_0 / 100))) - (0.166 * sex) - 0.308,
      TRUE ~ NA_real_),
    
    # 基线时 ALST/身高² 计算 (Instance_0)
    ALST_per_height2_0 = case_when(
      !is.na(Body_fat_percentage_Instance_0) & !is.na(Weight__Instance_0) & !is.na(Standing_height__Instance_0) ~ 
        ((0.958 * (Weight__Instance_0 * (1 - Body_fat_percentage_Instance_0 / 100))) - (0.166 * sex) - 0.308) / ((Standing_height__Instance_0 / 100)^2),
      TRUE ~ NA_real_),
    
    # 第一次随访时 ALST 计算 (Instance_1)
    ALST_Instance_1 = case_when(
      !is.na(Body_fat_percentage_Instance_1) & !is.na(Weight__Instance_1) ~ 
        (0.958 * (Weight__Instance_1 * (1 - Body_fat_percentage_Instance_1 / 100))) - (0.166 * sex) - 0.308,
      TRUE ~ NA_real_),
    
    # 第一次随访时 ALST/身高² 计算 (Instance_1)
    ALST_per_height2_1 = case_when(
      !is.na(Body_fat_percentage_Instance_1) & !is.na(Weight__Instance_1) & !is.na(Standing_height__Instance_1) ~ 
        ((0.958 * (Weight__Instance_1 * (1 - Body_fat_percentage_Instance_1 / 100))) - (0.166 * sex) - 0.308) / ((Standing_height__Instance_1 / 100)^2),
      TRUE ~ NA_real_),
    # 计算基线时 ALST/BMI
    ALST_BMI_0 = case_when(
      !is.na(ALST_Instance_0) & !is.na(BMI0) ~ ALST_Instance_0 / BMI0,
      TRUE ~ NA_real_
    ),
    
    # 计算第一次随访时 ALST/BMI
    ALST_BMI_1 = case_when(
      !is.na(ALST_Instance_1) & !is.na(BMI1) ~ ALST_Instance_1 / BMI1,
      TRUE ~ NA_real_
    ), 
    # 计算基线ASM0（Instance_0）
    ASM0 = ( (Standing_height__Instance_0^2 / Impedance_of_whole_body__Instance_0) * 0.401 
             + sex * 3.825 
             + age0 * (-0.071) 
             + 5.102
    ),
    ASM_BMI_0 = case_when(
      !is.na(ASM0) & !is.na(BMI0) ~ ASM0 / BMI0,
      TRUE ~ NA_real_
    ),
    # 计算随访时间点ASM1（Instance_1）
    ASM1 = ( (Standing_height__Instance_1^2 / Impedance_of_whole_body__Instance_1) * 0.401 
             + sex * 3.825 
             + age1 * (-0.071) 
             + 5.102
    ),
    ASM_BMI_1 = case_when(
      !is.na(ASM1) & !is.na(BMI1) ~ ASM1 / BMI1,
      TRUE ~ NA_real_
    ),
    
  )

summary(df10$ALST_Instance_0)
summary(df10$ALST_Instance_1)
summary(df10$ALST_per_height2_0)
summary(df10$ALST_per_height2_1)
summary(df10$ASM0)
summary(df10$ASM1)
summary(df10$ASM_BMI_0)
summary(df10$ALST_BMI_1)

save(df10,file = "D:\\UKBdata\\dputdata\\df10.Rdata")

load(file = "D:\\UKBdata\\dputdata\\df10.Rdata")

#删除缺少BIA数据
summary(df10$ASM_BMI_0)
summary(df10$ASM_BMI_1)

df11 <- df10 %>% 
  filter(!is.na(instance1_date))
nrow(df10)-nrow(df11)

df12 <- df11 %>% 
  filter(!is.na(ASM_BMI_0)) %>% 
  filter(!is.na(ASM_BMI_1))
nrow(df11)-nrow(df12)
#[1] 590

table(df12$Event_Status)

df13 <- df12 %>% 
  filter((Event_Status!=999))

nrow(df12)-nrow(df13)
#[1] 15 

# 缺失值很少使用中位数进行插补
df14 <- df13 %>%
  mutate(
    ALST_per_height2_0 = ifelse(is.na(ALST_per_height2_0), 
                                median(ALST_per_height2_0, na.rm = TRUE), 
                                ALST_per_height2_0),
    ALST_per_height2_1 = ifelse(is.na(ALST_per_height2_1), 
                                median(ALST_per_height2_1, na.rm = TRUE), 
                                ALST_per_height2_1),
    ALST_BMI_0 = ifelse(is.na(ALST_BMI_0), 
                        median(ALST_BMI_0, na.rm = TRUE), 
                        ALST_BMI_0),
    ALST_BMI_1 = ifelse(is.na(ALST_BMI_1), 
                        median(ALST_BMI_1, na.rm = TRUE), 
                        ALST_BMI_1),
    grip_strength0 = ifelse(is.na(grip_strength0), 
                            median(grip_strength0, na.rm = TRUE), 
                            grip_strength0),
    grip_strength1 = ifelse(is.na(grip_strength1), 
                            median(grip_strength1, na.rm = TRUE), 
                            grip_strength1),
    BMI0 = ifelse(is.na(BMI0), 
                  median(BMI0, na.rm = TRUE), 
                  BMI0),
    BMI1 = ifelse(is.na(BMI1), 
                  median(BMI1, na.rm = TRUE), 
                  BMI1),
    ASM0 = ifelse(is.na(ASM0), 
                  median(ASM0, na.rm = TRUE), 
                  ASM0),
    ASM_BMI_0 = ifelse(is.na(ASM_BMI_0), 
                       median(ASM_BMI_0, na.rm = TRUE), 
                       ASM_BMI_0),
    ASM1 = ifelse(is.na(ASM1), 
                  median(ASM1, na.rm = TRUE), 
                  ASM1),
    ASM_BMI_1 = ifelse(is.na(ASM_BMI_1), 
                       median(ASM_BMI_1, na.rm = TRUE), 
                       ASM_BMI_1),
    smokingstatus0 = ifelse(is.na(smokingstatus0), 
                  median(smokingstatus0, na.rm = TRUE), 
                  smokingstatus0),
    smokingstatus1 = ifelse(is.na(smokingstatus1), 
                       median(smokingstatus1, na.rm = TRUE), 
                       smokingstatus1),
    race = ifelse(is.na(race), 
                  median(race, na.rm = TRUE), 
                  race),
    education_new = ifelse(is.na(education_new), 
                       median(education_new, na.rm = TRUE), 
                       education_new),
    Townsend = ifelse(is.na(Townsend), 
                           median(Townsend, na.rm = TRUE), 
                      Townsend)
  )

table(df14$sex,useNA = "ifany")
table(df14$smokingstatus0,useNA = "ifany")
table(df14$smokingstatus1,useNA = "ifany")
table(df14$alcohol_new0,useNA = "ifany")
table(df14$alcohol_new1,useNA = "ifany")
table(df14$central_obesity0,useNA = "ifany")
table(df14$high_TG0,useNA = "ifany")
table(df14$high_glu0,useNA = "ifany")
table(df14$low_HDL0,useNA = "ifany")
table(df14$HBP_all,useNA = "ifany")
table(df14$race,useNA = "ifany")
table(df14$education_new,useNA = "ifany")
table(df14$MET_group,useNA = "ifany")

summary(df14$age0)
summary(df14$BMI0)
summary(df14$BMI1)
summary(df14$Townsend)


df15 <- df14 %>% 
  filter(Event_Status!=666)
save(df15,file = "D:\\UKBdata\\dputdata\\df15.Rdata")

#开始
load("D:\\UKBdata\\dputdata\\df15.Rdata")




final_nafld1 <- final_nafld1 %>%
  mutate(
    muscle_change2 = ifelse(ALST_per_height2_1 < ALST_per_height2_0 - 0.98^5, 1, 0)
  )

table(final_nafld1$muscle_change2)
table(final_nafld1$Event_Status)
summary(final_nafld1$Follow_Up_Time_Years)

summary(final_nafld1$grip_strength0)
summary(final_nafld1$grip_strength1)

library(tidyverse)
library(haven)
library(mice)
library(ggplot2)
library(lubridate)
library(cmprsk)
bmtcrr <- df10
# 转换Event_Status为因变量，其中 0 为无事件，1 为NAFLD结局，2 为死亡
bmtcrr$Event_Status <- factor(bmtcrr$Event_Status, levels = c(0, 1, 2))

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change<- factor(bmtcrr$muscle_change2)
#bmtcrr$Source <- factor(bmtcrr$Source) #将Source变量因子化其次，要把所有自变量放在一个数据框里。
covs <- subset(bmtcrr, select = c(muscle_change2,race, sex,age0)) #对数据进行筛选，只保留ftime和Status之外的变量
#covs[, c(1:3, 5)] <- lapply(covs[, c(1:3, 5)], as.integer) #将第1-3和第5列变量强制转为整数型
str(covs) #检查数据框中的变量
#指定failcode=1, cencode=0, 分别代表结局事件1与截尾0，其他默认为竞争风险事件2。
f2 <- crr(bmtcrr$Follow_Up_Time_Years, bmtcrr$Event_Status, covs, failcode = 1, cencode = 0) #建立多因素分析模型
summary(f2) #展示结果


library(riskRegression)
library(prodlim)
table(bmtcrr$Event_Status)
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+muscle_change2,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果


summary(final_nafld1$ASM0)
summary(final_nafld1$ASM_BMI_0)

bmtcrr <- df10
#计算差值四分位
bmtcrr <- bmtcrr %>%
  mutate(
    muscle_change_diff = ASM_BMI_1 - ASM_BMI_0,  # 计算差值
    muscle_change4 = ntile(muscle_change_diff, 4)  # 根据差值进行四分位分组
  )


# 检查分组情况
table(final_nafld11$muscle_change4)
bmtcrr <- final_nafld11
bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change4<- factor(bmtcrr$muscle_change4)

#未校正
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~muscle_change4,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

bmtcrr <- df10
#计算差值三分位
bmtcrr <- bmtcrr %>% 
  filter(!is.na(FollowUp_Years_NAFLD1))
bmtcrr <- bmtcrr %>%
  mutate(
    muscle_change_diff = ASM_BMI_1 - ASM_BMI_0,  # 计算差值
    muscle_change3 = ntile(muscle_change_diff, 3)  # 根据差值进行三分位分组
  )
bmtcrr <- bmtcrr %>%
  mutate(event2=ifelse(Event_Status==1,1,0))

# 检查分组情况
table(bmtcrr$muscle_change3)
bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change3<- factor(bmtcrr$muscle_change3)
summary(bmtcrr$FollowUp_Years_NAFLD1)


#未校正
m1<-FGR(Hist(FollowUp_Years_NAFLD1,event2)~muscle_change3+age0+sex,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果



#计算差值四分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change4 = ntile(muscle_change_diff, 4)  # 根据差值进行四分位分组
  )


# 检查分组情况
table(final_nafld11$muscle_change4)
bmtcrr <- final_nafld11
bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change4<- factor(bmtcrr$muscle_change4)

#未校正
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~muscle_change4,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果



#定义新变量累计风险ASM
#计算差值四分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_cumula=(ASM_BMI_1 + ASM_BMI_0)/8.86,
    muscle_change_cumula3 = ntile(muscle_change_cumula, 3)  # 根据差值进行四分位分组
  )


# 检查分组情况
table(final_nafld11$muscle_change_cumula3)
bmtcrr <- final_nafld11
bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change_cumula3<- factor(bmtcrr$muscle_change_cumula3)

#未校正
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~muscle_change_cumula3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$muscle_change_cumula3)


#校正age0+sex+race
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+muscle_change_cumula3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$muscle_change_cumula3)


#校正age0+sex+race+central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0
table(bmtcrr$central_obesity0)
table(bmtcrr$T2DM0_all)
table(bmtcrr$HBP_all)
table(bmtcrr$low_HDL0)
table(bmtcrr$high_TG0)
table(bmtcrr$education_new)
table(bmtcrr$alcohol_new0)
summary(bmtcrr$MET插补0)
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+
          Townsend+education_new+smokingstatus0+alcohol_new0+MET插补0+
          central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+
          muscle_change_cumula3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$muscle_change_cumula3)


#定义新变量累计风险
#计算差值四分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_cumula=(ALST_BMI_1 + ALST_BMI_0)/8.86,
    muscle_change_cumula3 = ntile(muscle_change_cumula, 3)  # 根据差值进行四分位分组
  )


# 检查分组情况
table(final_nafld11$muscle_change_cumula3)
bmtcrr <- final_nafld11
bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change_cumula3<- factor(bmtcrr$muscle_change_cumula3)

#未校正
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~muscle_change_cumula3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$muscle_change_cumula3)


#校正age0+sex+race
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+muscle_change_cumula3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$muscle_change_cumula3)


#校正age0+sex+race+central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0
table(bmtcrr$central_obesity0)
table(bmtcrr$T2DM0_all)
table(bmtcrr$HBP_all)
table(bmtcrr$low_HDL0)
table(bmtcrr$high_TG0)
table(bmtcrr$education_new)
table(bmtcrr$alcohol_new0)
summary(bmtcrr$MET插补0)
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+
          Townsend+education_new+smokingstatus0+alcohol_new0+MET插补0+
          central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+
          muscle_change_cumula3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$muscle_change_cumula3)


colnames(final_nafld11) %>% dput
summary(final_nafld11$smokingstatus0)
summary(final_nafld11$Townsend)
summary(final_nafld11$`MET插补0`)

summary(final_nafld11$`Glucose@307400.0`)
summary(final_nafld11$`HDL@307600.0`)
summary(final_nafld11$`LDL@307800.0`)
summary(final_nafld11$SBP0)
summary(final_nafld11$`LDL@307800.0`)

table(final_nafld11$education)
table(final_nafld11$smokingstatus0)
table(final_nafld11$alcoholstatus0)
table(final_nafld11$diet0)
table(final_nafld11$T2DM0)

table(final_nafld11$T2DM0)
table(final_nafld11$T2DM0)
table(final_nafld11$T2DM0)
table(final_nafld11$T2DM0)

#握力：计算差值四分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    grip_strength_diff = grip_strength1 - grip_strength0,  # 计算差值
    grip_strength4 = ntile(grip_strength_diff, 4)  # 根据差值进行四分位分组
  )

# 检查分组情况
table(final_nafld11$grip_strength4)
bmtcrr <- final_nafld11
bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$grip_strength4<- factor(bmtcrr$grip_strength4)
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+grip_strength4,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$grip_strength4)

#握力校正变量
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+BMI0+
          Townsend+education+smokingstatus0+alcoholstatus0+diet0+MET插补0+
          central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+grip_strength4,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$grip_strength4)


#握力：计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    grip_strength_diff = grip_strength1 - grip_strength0,  # 计算差值
    grip_strength3 = ntile(grip_strength_diff, 3)  # 根据差值进行四分位分组
  )

# 检查分组情况
table(final_nafld11$grip_strength3)
bmtcrr <- final_nafld11
bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$grip_strength3<- factor(bmtcrr$grip_strength3)

#握力校正变量
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+BMI0+
          Townsend+education+smokingstatus0+alcoholstatus0+diet0+MET插补0+
          central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+grip_strength3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$grip_strength3)


#握力/BMI：计算差值四分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    grip_BMI_diff = grip_BMI1 - grip_BMI0,  # 计算差值
    grip_BMI4 = ntile(grip_BMI_diff, 4)  # 根据差值进行四分位分组
  )

# 检查分组情况
table(final_nafld11$grip_BMI4)
bmtcrr <- final_nafld11
bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$grip_BMI4<- factor(bmtcrr$grip_BMI4)
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+grip_BMI4,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$grip_BMI4)


#握力校正变量
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+
          Townsend+education+smokingstatus0+alcoholstatus0+diet0+MET插补0+
          central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+grip_BMI4,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$grip_BMI4)



#握力：计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    grip_BMI_diff = grip_BMI1 - grip_BMI0,  # 计算差值
    grip_BMI3 = ntile(grip_BMI_diff, 3)  # 根据差值进行四分位分组
  )

# 检查分组情况
table(final_nafld11$grip_BMI3)
bmtcrr <- final_nafld11
bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$grip_BMI3<- factor(bmtcrr$grip_BMI3)
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+
          Townsend+education+smokingstatus0+alcoholstatus0+diet0+MET插补0+
          central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+grip_BMI3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$grip_BMI3)



#计算差值五分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change = ntile(muscle_change_diff, 5)  # 根据差值进行五分位分组
  )

# 检查分组情况
table(final_nafld11$muscle_change)
bmtcrr <- final_nafld11
bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change<- factor(bmtcrr$muscle_change)
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+muscle_change,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$muscle_change)


#计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change = ntile(muscle_change_diff, 3)  # 根据差值进行三分位分组
  )

# 单变量相关性分析
vars <- c("age0", "sex", "race", "Townsend", "education", "smokingstatus0",
          "alcoholstatus0",  "MET插补0", "central_obesity0",
          "T2DM0_all", "HBP_all", "low_HDL0", "high_TG0")

# 创建结果存储框架
results <- data.frame(Variable = character(),
                      Estimate = numeric(),
                      P_value = numeric(),
                      stringsAsFactors = FALSE)

# 循环进行单变量回归分析
for (var in vars) {
  formula <- as.formula(paste("muscle_change ~", var))
  model <- lm(formula, data = final_nafld11)
  
  # 提取结果
  summary_table <- coef(summary(model))
  
  # 仅保留自变量结果（排除截距项）
  if (nrow(summary_table) > 1) {
    new_row <- data.frame(
      Variable = var,
      Estimate = summary_table[2, 1],
      P_value = summary_table[2, 4]
    )
    results <- rbind(results, new_row)
  }
}

# 显示结果
print(results)

# 可选：添加星号标记显著性
results$Significance <- cut(results$P_value,
                            breaks = c(-Inf, 0.001, 0.01, 0.05, 0.1, Inf),
                            labels = c("***", "**", "*", ".", " "))

# 按p值排序
results <- results[order(results$P_value), ]

# 打印美化后的结果
print(results[, c("Variable", "Estimate", "P_value", "Significance")])




# 检查分组情况
table(final_nafld11$muscle_change)
bmtcrr <- final_nafld11
bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Event_Status<- factor(bmtcrr$Event_Status) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change<- factor(bmtcrr$muscle_change)
m1<-FGR(Hist(Follow_Up_Time_Years,Event_Status)~age0+sex+race+Townsend+education+smokingstatus0+alcoholstatus0+MET插补0+
          +central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+muscle_change,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果

test <- bmtcrr %>% 
  filter(Event_Status==1)
table(test$muscle_change)


library(tidycmprsk)
library(gtsummary)

mul1<-tidycmprsk::crr(Surv(Follow_Up_Time_Years,Event_Status)~age0+sex+race+muscle_change,data=bmtcrr,
                       failcode=1,cencode=0)
mul1
#HR等结果
table1<-tidy(mul1);table1#回归系数和p值↓


library(tidycmprsk)
tria <- bmtcrr
out<-cuminc(Surv(Follow_Up_Time_Years,Event_Status)~muscle_change,trial)
out
library(tidycmprsk)
data(trial)
head(trial)
trial <- bmtcrr
crr_mod <- crr(Surv(Follow_Up_Time_Years, Event_Status) ~ age0+sex+race + muscle_change, trial)
crr_mod

tbl <- 
  crr_mod %>%
  gtsummary::tbl_regression(exponentiate = TRUE) %>%
  gtsummary::add_global_p() %>%
  add_n(location = "level")

#gtsummary::inline_text(tbl, variable = age)

cuminc(Surv(Follow_Up_Time_Years, Event_Status) ~ 1, trial)
library(ggsurvfit)
#> Loading required package: ggplot2

cuminc(Surv(Follow_Up_Time_Years, Event_Status) ~ muscle_change, trial) %>%
  ggcuminc() +
  add_confidence_interval() +
  add_risktable() +
  scale_ggsurvfit(x_scales = list(breaks = seq(0, 24, by = 6)))
#> Plotting outcome "death from cancer".
tbl <- 
  cuminc(Surv(Follow_Up_Time_Years, Event_Status) ~ muscle_change, trial) %>%
  tbl_cuminc(times = c(2.5, 5), label_header = "**Year {time}**") %>%
  add_p() %>%
  add_n()
print(tbl)

table(bmtcrr$muscle_change)



#心血管风险
#计算差值四分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change4 = ntile(muscle_change_diff, 4)  # 根据差值进行四分位分组
  )

table(final_nafld11$AF_incidence)
table(final_nafld11$AF1)
bmtcrr <- final_nafld11 %>% 
  filter(AF1!=1) %>% 
  filter(AF_incidence!=99)

table(bmtcrr$AF_incidence)
summary(bmtcrr$AF_followtime_new)
# 检查分组情况
table(final_nafld11$muscle_change4)

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$AF_incidence<- factor(bmtcrr$AF_incidence) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change4<- factor(bmtcrr$muscle_change4)

#未校正
m1<-FGR(Hist(AF_followtime_new,AF_incidence)~muscle_change4,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果




#心血管风险
#计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change3 = ntile(muscle_change_diff, 3)  # 根据差值进行四分位分组
  )

table(final_nafld11$AF_incidence)
table(final_nafld11$AF1)
bmtcrr <- final_nafld11 %>% 
  filter(AF1!=1) %>% 
  filter(AF_incidence!=99)

table(bmtcrr$AF_incidence)
summary(bmtcrr$AF_followtime_new)
# 检查分组情况
table(final_nafld11$muscle_change3)

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$AF_incidence<- factor(bmtcrr$AF_incidence) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change3<- factor(bmtcrr$muscle_change3)

#未校正
m1<-FGR(Hist(AF_followtime_new,AF_incidence)~muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果



#计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change3 = ntile(muscle_change_diff, 3)  # 根据差值进行四分位分组
  )

table(final_nafld11$AF_incidence)
table(final_nafld11$AF1)
bmtcrr <- final_nafld11 %>% 
  filter(AF1!=1) %>% 
  filter(AF_incidence!=99)

table(bmtcrr$AF_incidence)
summary(bmtcrr$AF_followtime_new)
# 检查分组情况
table(final_nafld11$muscle_change3)

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$AF_incidence<- factor(bmtcrr$AF_incidence) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change3<- factor(bmtcrr$muscle_change3)

#校正age0+sex+race
m1<-FGR(Hist(AF_followtime_new,AF_incidence)~age0+sex+race+muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果




#校正age0+sex+race+central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0
table(bmtcrr$central_obesity0)
table(bmtcrr$T2DM0_all)
table(bmtcrr$HBP_all)
table(bmtcrr$low_HDL0)
table(bmtcrr$high_TG0)
table(bmtcrr$education_new)
table(bmtcrr$alcohol_new0)
summary(bmtcrr$MET插补0)
m1<-FGR(Hist(AF_followtime_new,AF_incidence)~age0+sex+race+
          Townsend+education_new+smokingstatus0+alcohol_new0+MET插补0+
          central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果





#心血管风险CVD
#计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change3 = ntile(muscle_change_diff, 3)  # 根据差值进行四分位分组
  )

table(final_nafld11$CVD_incidence)
table(final_nafld11$CVD1)
bmtcrr <- final_nafld11 %>% 
  filter(CVD1!=1) %>% 
  filter(CVD_incidence!=99)

table(bmtcrr$CVD_incidence)
summary(bmtcrr$CVD_followtime_new)
# 检查分组情况
table(final_nafld11$muscle_change3)

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$CVD_incidence<- factor(bmtcrr$CVD_incidence) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change3<- factor(bmtcrr$muscle_change3)

#未校正
m1<-FGR(Hist(CVD_followtime_new,CVD_incidence)~muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果



#计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change3 = ntile(muscle_change_diff, 3)  # 根据差值进行四分位分组
  )

table(final_nafld11$CVD_incidence)
table(final_nafld11$CVD1)
bmtcrr <- final_nafld11 %>% 
  filter(CVD1!=1) %>% 
  filter(CVD_incidence!=99)

table(bmtcrr$CVD_incidence)
summary(bmtcrr$CVD_followtime_new)
# 检查分组情况
table(final_nafld11$muscle_change3)

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$CVD_incidence<- factor(bmtcrr$CVD_incidence) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change3<- factor(bmtcrr$muscle_change3)

#校正age0+sex+race
m1<-FGR(Hist(CVD_followtime_new,CVD_incidence)~age0+sex+race+muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果




#校正age0+sex+race+central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0
table(bmtcrr$central_obesity0)
table(bmtcrr$T2DM0_all)
table(bmtcrr$HBP_all)
table(bmtcrr$low_HDL0)
table(bmtcrr$high_TG0)
table(bmtcrr$education_new)
table(bmtcrr$alcohol_new0)
summary(bmtcrr$MET插补0)
m1<-FGR(Hist(CVD_followtime_new,CVD_incidence)~age0+sex+race+
          Townsend+education_new+smokingstatus0+alcohol_new0+MET插补0+
          central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果





#心血管风险Stroke
#计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change3 = ntile(muscle_change_diff, 3)  # 根据差值进行四分位分组
  )

table(final_nafld11$Stroke_incidence)
table(final_nafld11$Stroke1)
bmtcrr <- final_nafld11 %>% 
  filter(Stroke1!=1) %>% 
  filter(Stroke_incidence!=99)

table(bmtcrr$Stroke_incidence)
summary(bmtcrr$Stroke_followtime_new)
# 检查分组情况
table(final_nafld11$muscle_change3)

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Stroke_incidence<- factor(bmtcrr$Stroke_incidence) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change3<- factor(bmtcrr$muscle_change3)

#未校正
m1<-FGR(Hist(Stroke_followtime_new,Stroke_incidence)~muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果



#计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change3 = ntile(muscle_change_diff, 3)  # 根据差值进行四分位分组
  )

table(final_nafld11$Stroke_incidence)
table(final_nafld11$Stroke1)
bmtcrr <- final_nafld11 %>% 
  filter(Stroke1!=1) %>% 
  filter(Stroke_incidence!=99)

table(bmtcrr$Stroke_incidence)
summary(bmtcrr$Stroke_followtime_new)
# 检查分组情况
table(final_nafld11$muscle_change3)

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$Stroke_incidence<- factor(bmtcrr$Stroke_incidence) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change3<- factor(bmtcrr$muscle_change3)

#校正age0+sex+race
m1<-FGR(Hist(Stroke_followtime_new,Stroke_incidence)~age0+sex+race+muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果




#校正age0+sex+race+central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0
table(bmtcrr$central_obesity0)
table(bmtcrr$T2DM0_all)
table(bmtcrr$HBP_all)
table(bmtcrr$low_HDL0)
table(bmtcrr$high_TG0)
table(bmtcrr$education_new)
table(bmtcrr$alcohol_new0)
summary(bmtcrr$MET插补0)
m1<-FGR(Hist(Stroke_followtime_new,Stroke_incidence)~age0+sex+race+
          Townsend+education_new+smokingstatus0+alcohol_new0+MET插补0+
          central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果





#心血管风险CHD
#计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change3 = ntile(muscle_change_diff, 3)  # 根据差值进行四分位分组
  )

table(final_nafld11$CHD_incidence)
table(final_nafld11$CHD1)
bmtcrr <- final_nafld11 %>% 
  filter(CHD1!=1) %>% 
  filter(CHD_incidence!=99)

table(bmtcrr$CHD_incidence)
summary(bmtcrr$CHD_followtime_new)
# 检查分组情况
table(final_nafld11$muscle_change3)

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$CHD_incidence<- factor(bmtcrr$CHD_incidence) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change3<- factor(bmtcrr$muscle_change3)

#未校正
m1<-FGR(Hist(CHD_followtime_new,CHD_incidence)~muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果



#计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change3 = ntile(muscle_change_diff, 3)  # 根据差值进行四分位分组
  )

table(final_nafld11$CHD_incidence)
table(final_nafld11$CHD1)
bmtcrr <- final_nafld11 %>% 
  filter(CHD1!=1) %>% 
  filter(CHD_incidence!=99)

table(bmtcrr$CHD_incidence)
summary(bmtcrr$CHD_followtime_new)
# 检查分组情况
table(final_nafld11$muscle_change3)

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$CHD_incidence<- factor(bmtcrr$CHD_incidence) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change3<- factor(bmtcrr$muscle_change3)

#校正age0+sex+race
m1<-FGR(Hist(CHD_followtime_new,CHD_incidence)~age0+sex+race+muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果




#校正age0+sex+race+central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0
table(bmtcrr$central_obesity0)
table(bmtcrr$T2DM0_all)
table(bmtcrr$HBP_all)
table(bmtcrr$low_HDL0)
table(bmtcrr$high_TG0)
table(bmtcrr$education_new)
table(bmtcrr$alcohol_new0)
summary(bmtcrr$MET插补0)
m1<-FGR(Hist(CHD_followtime_new,CHD_incidence)~age0+sex+race+
          Townsend+education_new+smokingstatus0+alcohol_new0+MET插补0+
          central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果




#心血管风险HF
#计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change3 = ntile(muscle_change_diff, 3)  # 根据差值进行四分位分组
  )

table(final_nafld11$HF_incidence)
table(final_nafld11$HF1)
bmtcrr <- final_nafld11 %>% 
  filter(HF1!=1) %>% 
  filter(HF_incidence!=99)

table(bmtcrr$HF_incidence)
summary(bmtcrr$HF_followtime_new)
# 检查分组情况
table(final_nafld11$muscle_change3)

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$HF_incidence<- factor(bmtcrr$HF_incidence) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change3<- factor(bmtcrr$muscle_change3)

#未校正
m1<-FGR(Hist(HF_followtime_new,HF_incidence)~muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果



#计算差值三分位
final_nafld11 <- final_nafld1 %>%
  mutate(
    muscle_change_diff = ALST_BMI_1 - ALST_BMI_0,  # 计算差值
    muscle_change3 = ntile(muscle_change_diff, 3)  # 根据差值进行四分位分组
  )

table(final_nafld11$HF_incidence)
table(final_nafld11$HF1)
bmtcrr <- final_nafld11 %>% 
  filter(HF1!=1) %>% 
  filter(HF_incidence!=99)

table(bmtcrr$HF_incidence)
summary(bmtcrr$HF_followtime_new)
# 检查分组情况
table(final_nafld11$muscle_change3)

bmtcrr$sex <- factor(bmtcrr$sex) #将Sex变量因子化
bmtcrr$HF_incidence<- factor(bmtcrr$HF_incidence) #将D变量因子化
bmtcrr$race <- factor(bmtcrr$race) #将Phase变量因子化
bmtcrr$muscle_change3<- factor(bmtcrr$muscle_change3)

#校正age0+sex+race
m1<-FGR(Hist(HF_followtime_new,HF_incidence)~age0+sex+race+muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果




#校正age0+sex+race+central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0
table(bmtcrr$central_obesity0)
table(bmtcrr$T2DM0_all)
table(bmtcrr$HBP_all)
table(bmtcrr$low_HDL0)
table(bmtcrr$high_TG0)
table(bmtcrr$education_new)
table(bmtcrr$alcohol_new0)
summary(bmtcrr$MET插补0)
m1<-FGR(Hist(HF_followtime_new,HF_incidence)~age0+sex+race+
          Townsend+education_new+smokingstatus0+alcohol_new0+MET插补0+
          central_obesity0+T2DM0_all+HBP_all+low_HDL0+high_TG0+muscle_change3,cause=1,data=bmtcrr)
a<-summary(m1)
res<-cbind(a$coef[,c(1,3,5)],a$conf.int[,c(1,3,4)])
round(res,4)#结果